library(tidyverse)
library(tidycensus)
library(tigris)
library(sf)
# .gitignore !
# api_key <- read_file("week 3/api_key.txt") %>% trimws()
# census_api_key(key=api_key, install = TRUE)tidycensus
Tidycensus
Setting the key
Getting Decennial
dhc_20 <- tidycensus::load_variables(2020, "dhc", cache = TRUE)
dhc_20# A tibble: 9,067 × 3
name label concept
<chr> <chr> <chr>
1 H10_001N " !!Total:" TENURE…
2 H10_002N " !!Total:!!Owner occupied:" TENURE…
3 H10_003N " !!Total:!!Owner occupied:!!Householder who is White alone" TENURE…
4 H10_004N " !!Total:!!Owner occupied:!!Householder who is Black or Af… TENURE…
5 H10_005N " !!Total:!!Owner occupied:!!Householder who is American In… TENURE…
6 H10_006N " !!Total:!!Owner occupied:!!Householder who is Asian alone" TENURE…
7 H10_007N " !!Total:!!Owner occupied:!!Householder who is Native Hawa… TENURE…
8 H10_008N " !!Total:!!Owner occupied:!!Householder who is Some Other … TENURE…
9 H10_009N " !!Total:!!Owner occupied:!!Householder who is Two or More… TENURE…
10 H10_010N " !!Total:!!Renter occupied:" TENURE…
# ℹ 9,057 more rows
dhc_20 %>%
mutate(concept_trim = str_remove(concept, "\\(.*\\)")) %>%
group_by(concept_trim) %>% summarise(n = n()) %>% arrange(desc(n))# A tibble: 76 × 2
concept_trim n
<chr> <int>
1 "SEX BY SINGLE-YEAR AGE " 3135
2 "SEX BY AGE FOR SELECTED AGE CATEGORIES " 1666
3 "GROUP QUARTERS POPULATION BY SEX BY AGE BY MAJOR GROUP QUARTERS TYPE " 567
4 "HOUSEHOLD TYPE " 477
5 "SEX BY AGE FOR THE POPULATION IN HOUSEHOLDS " 441
6 "SEX BY SINGLE-YEAR AGE" 209
7 "GROUP QUARTERS POPULATION BY SEX BY AGE BY GROUP QUARTERS TYPE" 195
8 "TENURE BY AGE OF HOUSEHOLDER " 189
9 "FAMILY TYPE BY PRESENCE AND AGE OF OWN CHILDREN " 180
10 "TENURE BY HOUSEHOLD SIZE " 153
# ℹ 66 more rows
dhc_20 %>% filter(str_detect(concept, "POP")) %>%
mutate(changed_label = str_trim(str_remove_all(label, "[!:]"))) %>%
filter(changed_label == "Total")# A tibble: 39 × 4
name label concept changed_label
<chr> <chr> <chr> <chr>
1 H8_001N " !!Total" TOTAL POPULATION IN OCCUPIED HOUSING U… Total
2 P10_001N " !!Total:" RACE FOR THE POPULATION 18 YEARS AND O… Total
3 P11_001N " !!Total:" HISPANIC OR LATINO, AND NOT HISPANIC O… Total
4 P14_001N " !!Total:" SEX BY AGE FOR THE POPULATION UNDER 20… Total
5 P15_001N " !!Total:" POPULATION IN HOUSEHOLDS BY AGE Total
6 P18_001N " !!Total:" GROUP QUARTERS POPULATION BY SEX BY AG… Total
7 P1_001N " !!Total" TOTAL POPULATION Total
8 PCO1_001N " !!Total:" GROUP QUARTERS POPULATION BY SEX BY AGE Total
9 PCT13A_001N " !!Total:" SEX BY AGE FOR THE POPULATION IN HOUSE… Total
10 PCT13B_001N " !!Total:" SEX BY AGE FOR THE POPULATION IN HOUSE… Total
# ℹ 29 more rows
dhc_20 %>% filter(str_starts(name, "P1_")) %>% arrange(name)# A tibble: 1 × 3
name label concept
<chr> <chr> <chr>
1 P1_001N " !!Total" TOTAL POPULATION
total_pop <- tidycensus::get_decennial(geography = "county", variables = "P1_001N")Getting data from the 2020 decennial Census
Using the PL 94-171 Redistricting Data Summary File
Note: 2020 decennial Census data use differential privacy, a technique that
introduces errors into data to preserve respondent confidentiality.
ℹ Small counts should be interpreted with caution.
ℹ See https://www.census.gov/library/fact-sheets/2021/protecting-the-confidentiality-of-the-2020-census-redistricting-data.html for additional guidance.
This message is displayed once per session.
total_pop# A tibble: 3,221 × 4
GEOID NAME variable value
<chr> <chr> <chr> <dbl>
1 01001 Autauga County, Alabama P1_001N 58805
2 01003 Baldwin County, Alabama P1_001N 231767
3 01005 Barbour County, Alabama P1_001N 25223
4 01007 Bibb County, Alabama P1_001N 22293
5 01009 Blount County, Alabama P1_001N 59134
6 01011 Bullock County, Alabama P1_001N 10357
7 01013 Butler County, Alabama P1_001N 19051
8 01015 Calhoun County, Alabama P1_001N 116441
9 21135 Lewis County, Kentucky P1_001N 13080
10 21137 Lincoln County, Kentucky P1_001N 24275
# ℹ 3,211 more rows
transport_vars <- tidycensus::load_variables(year=2020, dataset = "acs5", cache = TRUE) %>%
filter(str_starts(name, "B08301")) %>%
filter(str_count(label, "\\!\\!") == 2) %>% select(name, label)transport_pop <- tidycensus::get_acs(geography = "county", variables = transport_vars$name, year=2020) %>%
left_join(transport_vars, join_by(variable == name))Getting data from the 2016-2020 5-year ACS
transport_pop# A tibble: 25,768 × 6
GEOID NAME variable estimate moe label
<chr> <chr> <chr> <dbl> <dbl> <chr>
1 01001 Autauga County, Alabama B08301_002 23401 847 Estimate!!Total:!!Ca…
2 01001 Autauga County, Alabama B08301_010 109 106 Estimate!!Total:!!Pu…
3 01001 Autauga County, Alabama B08301_016 0 29 Estimate!!Total:!!Ta…
4 01001 Autauga County, Alabama B08301_017 43 48 Estimate!!Total:!!Mo…
5 01001 Autauga County, Alabama B08301_018 68 113 Estimate!!Total:!!Bi…
6 01001 Autauga County, Alabama B08301_019 183 132 Estimate!!Total:!!Wa…
7 01001 Autauga County, Alabama B08301_020 191 150 Estimate!!Total:!!Ot…
8 01001 Autauga County, Alabama B08301_021 954 250 Estimate!!Total:!!Wo…
9 01003 Baldwin County, Alabama B08301_002 88321 2037 Estimate!!Total:!!Ca…
10 01003 Baldwin County, Alabama B08301_010 30 35 Estimate!!Total:!!Pu…
# ℹ 25,758 more rows
transport_pop %>% group_by(variable) %>% slice_max(order_by = estimate, n = 1) # A tibble: 8 × 6
# Groups: variable [8]
GEOID NAME variable estimate moe label
<chr> <chr> <chr> <dbl> <dbl> <chr>
1 06037 Los Angeles County, California B08301_002 3902247 9941 Estimate!!Tota…
2 36047 Kings County, New York B08301_010 682380 6289 Estimate!!Tota…
3 36061 New York County, New York B08301_016 20968 1547 Estimate!!Tota…
4 06037 Los Angeles County, California B08301_017 10861 778 Estimate!!Tota…
5 06037 Los Angeles County, California B08301_018 32169 1479 Estimate!!Tota…
6 36061 New York County, New York B08301_019 172404 4397 Estimate!!Tota…
7 06037 Los Angeles County, California B08301_020 54593 2292 Estimate!!Tota…
8 06037 Los Angeles County, California B08301_021 384448 5604 Estimate!!Tota…
transport_pop %>%
left_join(total_pop, join_by(GEOID), suffix = c("_transport", "_pop")) %>%
# filter(value > 1e4) %>%
mutate(pop_perc = estimate/value) %>%
group_by(variable_transport) %>%
slice_max(order_by = pop_perc, n = 1) # A tibble: 8 × 10
# Groups: variable_transport [8]
GEOID NAME_transport variable_transport estimate moe label NAME_pop
<chr> <chr> <chr> <dbl> <dbl> <chr> <chr>
1 15005 Kalawao County, Hawaii B08301_002 399 182 Esti… Kalawao…
2 36061 New York County, New Y… B08301_010 480245 5719 Esti… New Yor…
3 02150 Kodiak Island Borough,… B08301_016 459 229 Esti… Kodiak …
4 13101 Echols County, Georgia B08301_017 60 63 Esti… Echols …
5 15005 Kalawao County, Hawaii B08301_018 6 4 Esti… Kalawao…
6 02068 Denali Borough, Alaska B08301_019 918 1072 Esti… Denali …
7 02158 Kusilvak Census Area, … B08301_020 1004 94 Esti… Kusilva…
8 46063 Harding County, South … B08301_021 252 90 Esti… Harding…
# ℹ 3 more variables: variable_pop <chr>, value <dbl>, pop_perc <dbl>
counties_tigris <- tigris::counties(year = 2020, cb=TRUE, resolution = "20m", progress_bar=FALSE)
counties_tigrisSimple feature collection with 3221 features and 12 fields
Geometry type: MULTIPOLYGON
Dimension: XY
Bounding box: xmin: -179.1743 ymin: 17.91377 xmax: 179.7739 ymax: 71.35256
Geodetic CRS: NAD83
First 10 features:
STATEFP COUNTYFP COUNTYNS AFFGEOID GEOID NAME NAMELSAD
1 01 061 00161556 0500000US01061 01061 Geneva Geneva County
2 08 125 00198178 0500000US08125 08125 Yuma Yuma County
3 17 177 01785076 0500000US17177 17177 Stephenson Stephenson County
4 28 153 00695797 0500000US28153 28153 Wayne Wayne County
5 34 041 00882237 0500000US34041 34041 Warren Warren County
6 46 051 01265782 0500000US46051 46051 Grant Grant County
7 51 013 01480097 0500000US51013 51013 Arlington Arlington County
8 21 007 00516850 0500000US21007 21007 Ballard Ballard County
9 37 091 01026127 0500000US37091 37091 Hertford Hertford County
10 13 057 01685718 0500000US13057 13057 Cherokee Cherokee County
STUSPS STATE_NAME LSAD ALAND AWATER
1 AL Alabama 06 1487908432 11567409
2 CO Colorado 06 6123763559 11134665
3 IL Illinois 06 1461392061 1350223
4 MS Mississippi 06 2099745602 7255476
5 NJ New Jersey 06 923435921 15822933
6 SD South Dakota 06 1764937243 15765681
7 VA Virginia 06 67332990 273562
8 KY Kentucky 06 639538865 70070117
9 NC North Carolina 06 914689325 18737418
10 GA Georgia 06 1090597331 34427734
geometry
1 MULTIPOLYGON (((-86.19348 3...
2 MULTIPOLYGON (((-102.8038 4...
3 MULTIPOLYGON (((-89.92647 4...
4 MULTIPOLYGON (((-88.94335 3...
5 MULTIPOLYGON (((-75.19261 4...
6 MULTIPOLYGON (((-97.22624 4...
7 MULTIPOLYGON (((-77.17228 3...
8 MULTIPOLYGON (((-89.16809 3...
9 MULTIPOLYGON (((-77.20847 3...
10 MULTIPOLYGON (((-84.64429 3...
ggplot(data = counties_tigris) + geom_sf()
counties_tigris <- tigris::shift_geometry(counties_tigris)
counties_tigrisSimple feature collection with 3221 features and 12 fields
Geometry type: GEOMETRY
Dimension: XY
Bounding box: xmin: -3112200 ymin: -1697728 xmax: 2258154 ymax: 1558935
Projected CRS: USA_Contiguous_Albers_Equal_Area_Conic
First 10 features:
STATEFP COUNTYFP COUNTYNS AFFGEOID GEOID NAME NAMELSAD
1 01 061 00161556 0500000US01061 01061 Geneva Geneva County
2 08 125 00198178 0500000US08125 08125 Yuma Yuma County
3 17 177 01785076 0500000US17177 17177 Stephenson Stephenson County
4 28 153 00695797 0500000US28153 28153 Wayne Wayne County
5 34 041 00882237 0500000US34041 34041 Warren Warren County
6 46 051 01265782 0500000US46051 46051 Grant Grant County
7 51 013 01480097 0500000US51013 51013 Arlington Arlington County
8 21 007 00516850 0500000US21007 21007 Ballard Ballard County
9 37 091 01026127 0500000US37091 37091 Hertford Hertford County
10 13 057 01685718 0500000US13057 13057 Cherokee Cherokee County
STUSPS STATE_NAME LSAD ALAND AWATER
1 AL Alabama 06 1487908432 11567409
2 CO Colorado 06 6123763559 11134665
3 IL Illinois 06 1461392061 1350223
4 MS Mississippi 06 2099745602 7255476
5 NJ New Jersey 06 923435921 15822933
6 SD South Dakota 06 1764937243 15765681
7 VA Virginia 06 67332990 273562
8 KY Kentucky 06 639538865 70070117
9 NC North Carolina 06 914689325 18737418
10 GA Georgia 06 1090597331 34427734
geometry
1 MULTIPOLYGON (((929857.8 -6...
2 MULTIPOLYGON (((-575240.5 3...
3 MULTIPOLYGON (((495694.5 57...
4 MULTIPOLYGON (((664464.4 -6...
5 MULTIPOLYGON (((1729323 550...
6 MULTIPOLYGON (((-96128.46 8...
7 MULTIPOLYGON (((1607262 315...
8 MULTIPOLYGON (((601189.3 -2...
9 MULTIPOLYGON (((1662478 244...
10 MULTIPOLYGON (((1034040 -29...
ggplot(data = counties_tigris) + geom_sf()
transport_pop %>%
left_join(total_pop, join_by(GEOID), suffix = c("_transport", "_pop")) %>%
filter(variable_transport == "B08301_016") %>%
mutate(pop_perc = estimate/value) %>%
left_join(x = counties_tigris, y = ., join_by(GEOID)) %>%
ggplot(data = .) +
geom_sf(aes(fill = pop_perc)) +
scale_fill_gradient(low = "white", high = "darkgreen", na.value = "white", name = "Percentage of population") +
ggthemes::theme_tufte() 
transport_pop %>%
left_join(total_pop, join_by(GEOID), suffix = c("_transport", "_pop")) %>%
filter(variable_transport == "B08301_016") %>%
mutate(pop_perc = estimate/value) %>%
left_join(x = counties_tigris, y = ., join_by(GEOID)) %>%
ggplot(data = .) +
geom_sf(aes(fill = pop_perc), lwd=0.1) +
scale_fill_gradient(low = "white", high = "darkgreen",trans = "log", na.value = "white", name = "Percentage of population", labels = scales::label_percent()) +
ggthemes::theme_tufte() 
transport_pop %>%
left_join(total_pop, join_by(GEOID), suffix = c("_transport", "_pop")) %>%
# filter(variable_transport != "B08301_002") %>%
mutate(pop_perc = estimate/value) %>%
left_join(x = counties_tigris, y = ., join_by(GEOID)) %>%
ggplot(data = .) +
geom_sf(aes(fill = pop_perc), color=NA) +
scale_fill_gradient(low = "white", high = "darkgreen",trans = "log", na.value = "white", name = "Percentage of population", labels = scales::label_percent()) +
facet_wrap(vars(label)) +
ggthemes::theme_tufte() 
transport_pop %>%
left_join(total_pop, join_by(GEOID), suffix = c("_transport", "_pop")) %>%
group_by(GEOID, NAME_transport) %>%
summarise(
perc_non_car = (sum(estimate) -
sum(estimate[variable_transport != "B08301_002"])) / mean(value)
) %>%
ungroup() %>%
left_join(x = counties_tigris, y = ., join_by(GEOID)) %>%
ggplot(data = .) +
geom_sf(aes(fill = perc_non_car), color=NA) +
scale_fill_gradient(low = "white", high = "darkgreen",trans = "log", na.value = "white", name = "Percentage of population not using cars", labels = scales::label_percent())`summarise()` has grouped output by 'GEOID'. You can override using the
`.groups` argument.

transport_pop %>%
left_join(total_pop, join_by(GEOID), suffix = c("_transport", "_pop")) %>%
group_by(GEOID, NAME_transport) %>%
summarise(
perc_non_car = (sum(estimate) -
sum(estimate[variable_transport != "B08301_002"])) / mean(value)
) %>%
ungroup() %>% arrange(desc(perc_non_car))`summarise()` has grouped output by 'GEOID'. You can override using the
`.groups` argument.
# A tibble: 3,221 × 3
GEOID NAME_transport perc_non_car
<chr> <chr> <dbl>
1 15005 Kalawao County, Hawaii 4.87
2 48301 Loving County, Texas 0.938
3 48137 Edwards County, Texas 0.579
4 46117 Stanley County, South Dakota 0.549
5 08047 Gilpin County, Colorado 0.539
6 46079 Lake County, South Dakota 0.538
7 31057 Dundy County, Nebraska 0.534
8 48105 Crockett County, Texas 0.532
9 48197 Hardeman County, Texas 0.527
10 20069 Gray County, Kansas 0.526
# ℹ 3,211 more rows
total_pop %>% filter(GEOID == "15005")# A tibble: 1 × 4
GEOID NAME variable value
<chr> <chr> <chr> <dbl>
1 15005 Kalawao County, Hawaii P1_001N 82
transport_pop %>% filter(GEOID == "15005")# A tibble: 8 × 6
GEOID NAME variable estimate moe label
<chr> <chr> <chr> <dbl> <dbl> <chr>
1 15005 Kalawao County, Hawaii B08301_002 399 182 Estimate!!Total:!!Car,…
2 15005 Kalawao County, Hawaii B08301_010 1 2 Estimate!!Total:!!Publ…
3 15005 Kalawao County, Hawaii B08301_016 0 12 Estimate!!Total:!!Taxi…
4 15005 Kalawao County, Hawaii B08301_017 0 12 Estimate!!Total:!!Moto…
5 15005 Kalawao County, Hawaii B08301_018 6 4 Estimate!!Total:!!Bicy…
6 15005 Kalawao County, Hawaii B08301_019 16 8 Estimate!!Total:!!Walk…
7 15005 Kalawao County, Hawaii B08301_020 0 12 Estimate!!Total:!!Othe…
8 15005 Kalawao County, Hawaii B08301_021 2 3 Estimate!!Total:!!Work…
total_pop_acs <- get_acs(geography = "county", variables = "B01001_001E", year = 2023)Getting data from the 2019-2023 5-year ACS
total_pop_acs %>% filter(GEOID == "15005")# A tibble: 1 × 5
GEOID NAME variable estimate moe
<chr> <chr> <chr> <dbl> <dbl>
1 15005 Kalawao County, Hawaii B01001_001 43 17
transport_pop %>%
left_join(total_pop, join_by(GEOID), suffix = c("_transport", "_pop")) %>%
filter(value > 10000) %>%
group_by(GEOID, NAME_transport) %>%
summarise(
perc_non_car = (sum(estimate) -
sum(estimate[variable_transport != "B08301_002"])) / mean(value)
) %>%
ungroup() %>%
left_join(x = counties_tigris, y = ., join_by(GEOID)) %>%
ggplot(data = .) +
geom_sf(aes(fill = perc_non_car), color=NA) +
scale_fill_gradient(low = "white", high = "darkgreen", na.value = "lightgray", name = "Percentage of population not using cars", labels = scales::label_percent())`summarise()` has grouped output by 'GEOID'. You can override using the
`.groups` argument.

Other mapping stuff!
https://r-spatial.github.io/mapview/index.html
# install.packages("mapview")
library(mapview)
transport_pop %>%
left_join(total_pop, join_by(GEOID), suffix = c("_transport", "_pop")) %>%
filter(value > 10000) %>%
group_by(GEOID, NAME_transport) %>%
summarise(
perc_non_car = (sum(estimate) -
sum(estimate[variable_transport != "B08301_002"])) / mean(value)
) %>%
ungroup() %>%
left_join(x = counties_tigris, y = ., join_by(GEOID)) %>%
mapview::mapview(zcol = "perc_non_car", label = "NAME_transport")`summarise()` has grouped output by 'GEOID'. You can override using the
`.groups` argument.
https://rstudio.github.io/leaflet/
# install.packages("leaflet")
library(leaflet)
prep_data <- transport_pop %>%
left_join(total_pop, join_by(GEOID), suffix = c("_transport", "_pop")) %>%
filter(value > 10000) %>%
group_by(GEOID, NAME_transport) %>%
summarise(
perc_non_car = (sum(estimate) -
sum(estimate[variable_transport != "B08301_002"])) / mean(value)
) %>%
ungroup() %>%
left_join(x = counties_tigris, y = ., join_by(GEOID)) %>%
st_transform(crs = 4326)`summarise()` has grouped output by 'GEOID'. You can override using the
`.groups` argument.
mypalette <- colorNumeric(
palette = "viridis", domain = prep_data$perc_non_car,
na.color = "transparent"
)
leaflet(prep_data) %>%
addPolygons(
fillColor = ~ mypalette(perc_non_car),
stroke = FALSE,
fillOpacity = 0.75,
) %>%
addTiles()